package extractor;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TextExtractor {
	private Pattern patternTag;
	private Matcher matcherTag;

	private static final String HTML_P_PATTERN = "(?i)<p>(.+?)</p>";

	public TextExtractor() {
		patternTag = Pattern.compile(HTML_P_PATTERN);
	}

	public List<String> grabText(final String html) {
		ArrayList<String> r = new ArrayList<String>();

		matcherTag = patternTag.matcher(html);
		while (matcherTag.find()) {
			//System.out.println("---> TEXT: " + matcherTag.group(1));
			r.add(matcherTag.group(1));
		}

		return r;
	}
}
